import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path='E:/AA DS TM/2-Time Series Data Analysis/individual_stocks_5yr'
company_list=['AAPL_data.csv','GOOG_data.csv','MSFT_data.csv','AMZN_data.csv']
all_data=pd.DataFrame()
for file in company_list:
curr_df=pd.read_csv(path+'/'+file)
all_data=pd.concat([all_data,curr_df])
all_data.shape
all_data.head()
tech_list=all_data['Name'].unique()
all_data.dtypes
all_data['date']=pd.to_datetime(all_data['date'])
all_data.dtypes
plt.figure(figsize=(20,12))
for i,company in enumerate(tech_list,1):
plt.subplot(2,2,i)
df=all_data[all_data['Name']==company]
plt.plot(df['date'],df['close'])
plt.xticks(rotation='vertical')
plt.title(company)
import plotly.express as px
for company in tech_list:
df=all_data[all_data['Name']==company]
fig=px.line(df,x='date',y='volume',title=company)
fig.show()
df=pd.read_csv('E:/AA DS TM/2-Time Series Data Analysis/individual_stocks_5yr/AAPL_data.csv')
df.head()
df['Daily_Price_change']=df['close']-df['open']
df.head()
df['1Day % return']=((df['close']-df['open'])/df['close'])*100
df.head()
fig=px.line(df,x='date',y='1Day % return',title=company)
fig.show()
df2=df.copy()
df2.dtypes
df2['date']=pd.to_datetime(df['date'])
df2.set_index('date',inplace=True)
df2.head()
df2['2013-02-08':'2013-02-13']
df2['close'].resample('M').mean().plot()
df2['close'].resample('Y').mean().plot(kind='bar')
aapl=pd.read_csv('E:/AA DS TM/2-Time Series Data Analysis/individual_stocks_5yr/AAPL_data.csv')
aapl.head()
amzn=pd.read_csv('E:/AA DS TM/2-Time Series Data Analysis/individual_stocks_5yr/AMZN_data.csv')
amzn.head()
msft=pd.read_csv('E:/AA DS TM/2-Time Series Data Analysis/individual_stocks_5yr/MSFT_data.csv')
msft.head()
goog=pd.read_csv('E:/AA DS TM/2-Time Series Data Analysis/individual_stocks_5yr/GOOG_data.csv')
goog.head()
close=pd.DataFrame() # creating an empty dataframe to store all the close values
close['aapl']=aapl['close']
close['amzn']=amzn['close']
close['msft']=msft['close']
close['goog']=goog['close']
close.head()
import seaborn as sns
sns.pairplot(data=close)
sns.heatmap(close.corr(),annot=True)
aapl.head()
data=pd.DataFrame()
data['aapl_change']=((aapl['close']-aapl['open'])/aapl['close'])*100
data['amzn_change']=((amzn['close']-amzn['open'])/amzn['close'])*100
data['msft_change']=((msft['close']-msft['open'])/msft['close'])*100
data['goog_change']=((goog['close']-goog['open'])/goog['close'])*100
data.head()
sns.pairplot(data=data)
sns.heatmap(data.corr(),annot=True)
sns.distplot(data['aapl_change'])
data['aapl_change'].std()
## 68% of entire data lies btw -1.8 to 1.8
data['aapl_change'].std()*2
## 95% of data lies btw -2.37 to 2.37
data['aapl_change'].std()*3
## 99.7% data lies btw -3.56 to 3.56
data['aapl_change'].quantile(0.1)
data.describe().T